Consumption Model for a Month

Assumption: consumption = func(individual components, future weather, future humidity)

Get Data

result = query_api.query_data_frame(org=org, query=query)
result.columns
Index(['result', 'table', '_start', '_stop', '_time', '_value', '_field',
       '_measurement', 'device', 'sensor', 'units'],
      dtype='object')
result.shape
(4182, 11)

result.to_csv(f’./Data/energy_info.csv’, index=False) #save the data as a csv

result.drop(['result', 'table', '_field', '_start', '_stop', '_measurement', 'units', 'device'], axis=1, inplace=True)
result.head()
_time _value sensor
0 2023-03-02 00:00:00+00:00 12.888000 Aircon
1 2023-03-03 00:00:00+00:00 121.541007 Aircon
2 2023-03-04 00:00:00+00:00 222.993806 Aircon
3 2023-03-05 00:00:00+00:00 118.435757 Aircon
4 2023-03-06 00:00:00+00:00 238.721389 Aircon
result.columns
Index(['_time', '_value', 'sensor'], dtype='object')

Energy in Wide Form

wide_df = result.pivot(index='_time', columns='sensor', values='_value')
# Reset the index to make 'id' a regular column
wide_df.reset_index(inplace=True)

wide_df.columns
Index(['_time', 'Aircon', 'Consumption', 'ConsumptionNet', 'Export', 'Fridge',
       'Garage', 'Hotwater', 'HouseBoardOutside', 'HouseBoardOutsideR',
       'HouseBoardR', 'Import', 'Lights', 'Net', 'OvenStove', 'Powerpoints',
       'Production', 'Unmonitored'],
      dtype='object', name='sensor')
wide_df['_time'] = pd.to_datetime(wide_df['_time']).dt.tz_localize(None)
wide_df.drop(['Unmonitored'], axis = 1, inplace=True)
wide_df.rename(columns={'_time':'ds'}, inplace=True)
# Delete the last row
wide_df = wide_df.drop(wide_df.index[-1])
wide_df_new = pd.DataFrame()
for column in wide_df.columns:
    wide_df_new[f'{column}'] = wide_df[f'{column}'].interpolate()

wide_df = pd.DataFrame(wide_df_new)
wide_df.columns
Index(['ds', 'Aircon', 'Consumption', 'ConsumptionNet', 'Export', 'Fridge',
       'Garage', 'Hotwater', 'HouseBoardOutside', 'HouseBoardOutsideR',
       'HouseBoardR', 'Import', 'Lights', 'Net', 'OvenStove', 'Powerpoints',
       'Production'],
      dtype='object')
wide_df.tail()
ds Aircon Consumption ConsumptionNet Export Fridge Garage Hotwater HouseBoardOutside HouseBoardOutsideR HouseBoardR Import Lights Net OvenStove Powerpoints Production
240 2023-10-28 8.578005 357.275503 866.654129 180.242247 25.492343 24.017087 0.530865 999.775552 333.258545 366.408449 71.876417 80.009421 -108.365830 66.086281 176.392548 491.748285
241 2023-10-29 8.661682 179.501779 866.654129 806.610093 19.634587 2.731102 0.192283 530.312474 176.770805 201.397413 1.974071 60.108551 -804.636021 1.253300 103.451046 1017.375874
242 2023-10-30 8.603354 263.812291 866.654129 757.601472 27.081000 0.983716 0.183639 788.486015 262.828480 290.616882 3.378339 66.827993 -754.223132 1.356785 178.170521 1052.272015
243 2023-10-31 9.032917 311.552394 866.654129 803.841970 35.649472 11.163953 0.269313 901.165305 300.388379 319.795146 5.027126 67.972347 -798.814844 30.217549 166.649785 1142.661342
244 2023-11-01 8.743938 211.339011 866.654129 898.594991 30.389375 3.655479 0.189410 623.050504 207.683462 245.486118 2.309883 50.624125 -896.285107 0.213375 145.941312 1134.978184
def data_scaler(df):
    import pandas as pd
    from sklearn.preprocessing import MinMaxScaler

    numeric_columns = df.select_dtypes(include=['number']).columns
        
    # Scale numeric columns
    df[numeric_columns] = MinMaxScaler().fit_transform(df[numeric_columns])

    return df
def long_form(df_plot):
    return df_plot.melt('ds', var_name='unique_id', value_name='y')

def altair_plot(df_plot): 
    import altair as alt
    highlight = alt.selection_point(on='mouseover', fields=['unique_id'], bind='legend', nearest=True)

    base = alt.Chart(df_plot).encode(
        x='ds:T',
        y='y:Q',
        color='unique_id:N'
    )

    points = base.mark_circle().encode(
        opacity=alt.value(0)
    ).add_params(
        highlight
    ).properties(
        width=1000
    )

    lines = base.mark_line().encode(
        size=alt.condition(~highlight, alt.value(1), alt.value(3))
    )

    return points + lines
wide_df = data_scaler(wide_df)
wide_df.tail()
ds Aircon Consumption ConsumptionNet Export Fridge Garage Hotwater HouseBoardOutside HouseBoardOutsideR HouseBoardR Import Lights Net OvenStove Powerpoints Production
240 2023-10-28 0.000327 0.295413 0.639383 0.087269 0.054518 0.475787 0.000988 0.441317 0.270647 0.263152 0.098826 0.397064 0.702175 0.217335 0.451213 0.164710
241 2023-10-29 0.000459 0.096052 0.639383 0.390543 0.008991 0.049113 0.000167 0.155476 0.095349 0.096613 0.001842 0.287229 0.452268 0.003559 0.195780 0.385757
242 2023-10-30 0.000367 0.190600 0.639383 0.366814 0.066865 0.014087 0.000146 0.312670 0.191751 0.186659 0.003790 0.324314 0.470363 0.003900 0.457440 0.400433
243 2023-10-31 0.001045 0.244137 0.639383 0.389203 0.133461 0.218148 0.000354 0.381277 0.233826 0.216107 0.006078 0.330630 0.454358 0.099064 0.417095 0.438445
244 2023-11-01 0.000589 0.131755 0.639383 0.435080 0.092579 0.067642 0.000160 0.211942 0.129978 0.141110 0.002308 0.234883 0.419373 0.000130 0.344577 0.435214
df = long_form(wide_df)
wide_df.rename(columns={'Consumption': 'y'}, inplace=True)
wide_df['unique_id'] = 'Consumption'
df.head()
ds unique_id y
0 2023-03-02 Aircon 0.007134
1 2023-03-03 Aircon 0.178748
2 2023-03-04 Aircon 0.338990
3 2023-03-05 Aircon 0.173844
4 2023-03-06 Aircon 0.363831

Weather data

import pandas as pd
path = 'Data'
name = 'weather'

weather_df = pd.read_csv(f'{path}/{name}.csv')
weather_df.head()
ds temp_c is_day wind_kph wind_degree pressure_mb precip_mm humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv
0 2023-01-01 00:00 21.6 0 12.6 126 1013.0 0.0 84 73 21.6 21.6 24.4 18.7 0 10.0 19.8 1.0
1 2023-01-01 01:00 21.3 0 11.9 133 1013.0 0.0 85 77 21.3 21.3 24.3 18.7 0 10.0 18.7 1.0
2 2023-01-01 02:00 21.1 0 11.2 140 1012.0 0.0 86 80 21.1 21.1 24.2 18.7 0 10.0 17.6 1.0
3 2023-01-01 03:00 20.8 0 10.4 147 1012.0 0.0 88 84 20.8 20.8 20.8 18.7 0 10.0 16.6 1.0
4 2023-01-01 04:00 21.0 0 10.6 144 1012.0 0.0 86 84 21.0 21.0 21.0 18.6 0 10.0 16.3 1.0
weather_df = data_scaler(weather_df)
def data_day_avg(weather_df):
    from tqdm.notebook import tqdm
    avg_df = pd.DataFrame()
    dates = pd.to_datetime(weather_df['ds']).dt.date.unique()
    
    for date in tqdm(dates):
        filtered_df = weather_df[pd.to_datetime(weather_df['ds']).dt.date == date]
        ds = filtered_df.pop('ds')

        filtered_df = pd.DataFrame(filtered_df.mean()).T
        filtered_df

        filtered_df.insert(0, 'ds', date)

        avg_df = pd.concat([avg_df, filtered_df], ignore_index=True)
    
    return avg_df
weather_avg_df = data_day_avg(weather_df)
weather_avg_df['ds'] = pd.to_datetime(weather_avg_df['ds'])
weather_avg_df
ds temp_c is_day wind_kph wind_degree pressure_mb precip_mm humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv
0 2023-01-01 0.566503 0.583333 0.459098 0.345636 0.428728 0.000895 0.670886 0.583750 0.516791 0.595085 0.512881 0.778121 0.0 1.000000 0.434678 0.422619
1 2023-01-02 0.554902 0.583333 0.451453 0.396936 0.359649 0.001253 0.602848 0.409167 0.499005 0.584249 0.486067 0.725707 0.0 1.000000 0.434844 0.428571
2 2023-01-03 0.582516 0.583333 0.263889 0.415506 0.258772 0.000000 0.584916 0.287917 0.525746 0.610043 0.502366 0.738663 0.0 1.000000 0.283533 0.452381
3 2023-01-04 0.650490 0.583333 0.313073 0.382660 0.163377 0.000895 0.687764 0.196250 0.608582 0.673535 0.609621 0.866902 0.0 0.994792 0.305777 0.500000
4 2023-01-05 0.654575 0.583333 0.257518 0.506267 0.166667 0.026859 0.735232 0.700417 0.629726 0.677350 0.608701 0.911808 0.0 0.989583 0.288098 0.416667
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
339 2023-12-06 0.580556 0.583333 0.254077 0.264740 0.558114 0.000000 0.608650 0.174583 0.519527 0.608364 0.515116 0.745878 0.0 1.000000 0.214558 0.470238
340 2023-12-07 0.605065 0.583333 0.266565 0.202066 0.565789 0.000000 0.582806 0.050000 0.543532 0.631258 0.533386 0.748233 0.0 1.000000 0.222693 0.476190
341 2023-12-08 0.618954 0.583333 0.349261 0.251625 0.575658 0.000018 0.626582 0.342083 0.561816 0.644078 0.555205 0.795200 0.0 1.000000 0.285940 0.476190
342 2023-12-09 0.612582 0.583333 0.362768 0.190460 0.544956 0.000107 0.577532 0.497500 0.564179 0.638584 0.539432 0.757509 0.0 1.000000 0.308267 0.428571
343 2023-12-10 0.630556 0.583333 0.513507 0.233751 0.530702 0.000233 0.515295 0.537500 0.571269 0.655067 0.546924 0.730271 0.0 1.000000 0.426627 0.452381

344 rows × 17 columns

(
    wide_df[['y']].plot(title='Consumption')
)
<Axes: title={'center': 'Consumption'}>

from statsforecast import StatsForecast
/home/ben/mambaforge/envs/cfast/lib/python3.11/site-packages/statsforecast/core.py:25: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)
  from tqdm.autonotebook import tqdm
from pylab import rcParams
rcParams['figure.figsize'] = (15,4)
StatsForecast.plot(df, engine='plotly')
/home/ben/mambaforge/envs/cfast/lib/python3.11/site-packages/_plotly_utils/basevalidators.py:105: FutureWarning:

The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result
wide_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245 entries, 0 to 244
Data columns (total 18 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   ds                  245 non-null    datetime64[ns]
 1   Aircon              245 non-null    float64       
 2   y                   245 non-null    float64       
 3   ConsumptionNet      198 non-null    float64       
 4   Export              245 non-null    float64       
 5   Fridge              245 non-null    float64       
 6   Garage              245 non-null    float64       
 7   Hotwater            245 non-null    float64       
 8   HouseBoardOutside   161 non-null    float64       
 9   HouseBoardOutsideR  245 non-null    float64       
 10  HouseBoardR         245 non-null    float64       
 11  Import              245 non-null    float64       
 12  Lights              245 non-null    float64       
 13  Net                 245 non-null    float64       
 14  OvenStove           245 non-null    float64       
 15  Powerpoints         245 non-null    float64       
 16  Production          245 non-null    float64       
 17  unique_id           245 non-null    object        
dtypes: datetime64[ns](1), float64(16), object(1)
memory usage: 34.6+ KB
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3920 entries, 0 to 3919
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   ds         3920 non-null   datetime64[ns]
 1   unique_id  3920 non-null   object        
 2   y          3789 non-null   float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 92.0+ KB
weather_avg_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   ds              344 non-null    datetime64[ns]
 1   temp_c          344 non-null    float64       
 2   is_day          344 non-null    float64       
 3   wind_kph        344 non-null    float64       
 4   wind_degree     344 non-null    float64       
 5   pressure_mb     344 non-null    float64       
 6   precip_mm       344 non-null    float64       
 7   humidity        344 non-null    float64       
 8   cloud           344 non-null    float64       
 9   feelslike_c     344 non-null    float64       
 10  windchill_c     344 non-null    float64       
 11  heatindex_c     344 non-null    float64       
 12  dewpoint_c      344 non-null    float64       
 13  chance_of_rain  344 non-null    float64       
 14  vis_km          344 non-null    float64       
 15  gust_kph        344 non-null    float64       
 16  uv              344 non-null    float64       
dtypes: datetime64[ns](1), float64(16)
memory usage: 45.8 KB
date_range_start, date_range_end  = wide_df['ds'].min(), wide_df['ds'].max()
# Filter rows between the start and end dates
weather_avg_filtered_df = weather_avg_df[(weather_avg_df['ds'] >= date_range_start) & (weather_avg_df['ds'] <= date_range_end)]
weather_avg_filtered_df
ds temp_c is_day wind_kph wind_degree pressure_mb precip_mm humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv
60 2023-03-02 0.644444 0.541667 0.294980 0.153784 0.344298 0.001056 0.675633 0.345417 0.609826 0.667888 0.594374 0.845112 0.0 1.0 0.250332 0.470238
61 2023-03-03 0.603268 0.541667 0.483563 0.386142 0.403509 0.002131 0.682489 0.579167 0.572388 0.629426 0.548107 0.821555 0.0 1.0 0.432022 0.380952
62 2023-03-04 0.561765 0.541667 0.574669 0.433612 0.425439 0.000161 0.604430 0.364583 0.509204 0.590659 0.491982 0.734982 0.0 1.0 0.485309 0.422619
63 2023-03-05 0.598039 0.541667 0.372324 0.350627 0.427632 0.000358 0.603903 0.401250 0.542537 0.624542 0.531677 0.761042 0.0 1.0 0.319804 0.428571
64 2023-03-06 0.629575 0.541667 0.313073 0.290854 0.417763 0.000358 0.608650 0.157500 0.568284 0.653999 0.557965 0.785925 0.0 1.0 0.263363 0.476190
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
300 2023-10-28 0.379575 0.541667 0.700051 0.446611 0.708333 0.000233 0.500000 0.389167 0.351741 0.414835 0.327681 0.497939 0.0 1.0 0.588811 0.345238
301 2023-10-29 0.382516 0.541667 0.383028 0.320682 0.656798 0.000125 0.524789 0.433750 0.347637 0.423230 0.329390 0.525324 0.0 1.0 0.328934 0.345238
302 2023-10-30 0.441340 0.583333 0.396152 0.435237 0.562500 0.000000 0.583861 0.181250 0.403483 0.478327 0.373686 0.611307 0.0 1.0 0.340139 0.416667
303 2023-10-31 0.550163 0.583333 0.489424 0.549559 0.442982 0.000000 0.570675 0.025417 0.492537 0.579976 0.467140 0.687132 0.0 1.0 0.470286 0.464286
304 2023-11-01 0.510784 0.583333 0.561672 0.445218 0.505482 0.000340 0.599156 0.480000 0.458209 0.543040 0.435594 0.685218 0.0 1.0 0.467795 0.404762

245 rows × 17 columns

altair_plot(df)
weather_avg_df_plot = long_form(weather_avg_filtered_df)
altair_plot(weather_avg_df_plot)
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
wide_df["y"]
0      0.355157
1      0.291076
2      0.498418
3      0.418824
4      0.561599
         ...   
240    0.295413
241    0.096052
242    0.190600
243    0.244137
244    0.131755
Name: y, Length: 245, dtype: float64
fig, axs = plt.subplots(nrows=1, ncols=2)

plot_acf(wide_df["y"],  lags=28, ax=axs[0],color="fuchsia")
axs[0].set_title("Autocorrelation");

plot_pacf(wide_df["y"],  lags=28, ax=axs[1],color="lime")
axs[1].set_title('Partial Autocorrelation')

plt.show();

fig, axs = plt.subplots(nrows=1, ncols=2)

plot_acf(wide_df["y"],  lags=14, ax=axs[0],color="fuchsia")
axs[0].set_title("Autocorrelation");

plot_pacf(wide_df["y"],  lags=14, ax=axs[1],color="lime")
axs[1].set_title('Partial Autocorrelation')

plt.show();

from statsmodels.tsa.seasonal import seasonal_decompose 
a = seasonal_decompose(wide_df["y"], model = "additive", period=28)
a.plot();

Designing Models

weather_avg_filtered_df.head()
ds temp_c is_day wind_kph wind_degree pressure_mb precip_mm humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv
60 2023-03-02 0.644444 0.541667 0.294980 0.153784 0.344298 0.001056 0.675633 0.345417 0.609826 0.667888 0.594374 0.845112 0.0 1.0 0.250332 0.470238
61 2023-03-03 0.603268 0.541667 0.483563 0.386142 0.403509 0.002131 0.682489 0.579167 0.572388 0.629426 0.548107 0.821555 0.0 1.0 0.432022 0.380952
62 2023-03-04 0.561765 0.541667 0.574669 0.433612 0.425439 0.000161 0.604430 0.364583 0.509204 0.590659 0.491982 0.734982 0.0 1.0 0.485309 0.422619
63 2023-03-05 0.598039 0.541667 0.372324 0.350627 0.427632 0.000358 0.603903 0.401250 0.542537 0.624542 0.531677 0.761042 0.0 1.0 0.319804 0.428571
64 2023-03-06 0.629575 0.541667 0.313073 0.290854 0.417763 0.000358 0.608650 0.157500 0.568284 0.653999 0.557965 0.785925 0.0 1.0 0.263363 0.476190
wide_df = pd.merge(wide_df, weather_avg_filtered_df, on='ds', how='outer')
wide_df.head()
ds Aircon y ConsumptionNet Export Fridge Garage Hotwater HouseBoardOutside HouseBoardOutsideR ... humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv
0 2023-03-02 0.007134 0.355157 NaN 0.016595 0.814609 0.003794 0.000287 NaN 0.356702 ... 0.675633 0.345417 0.609826 0.667888 0.594374 0.845112 0.0 1.0 0.250332 0.470238
1 2023-03-03 0.178748 0.291076 NaN 0.165011 0.669709 0.015031 0.000143 NaN 0.292065 ... 0.682489 0.579167 0.572388 0.629426 0.548107 0.821555 0.0 1.0 0.432022 0.380952
2 2023-03-04 0.338990 0.498418 NaN 0.294620 0.686509 0.059559 0.000460 NaN 0.496691 ... 0.604430 0.364583 0.509204 0.590659 0.491982 0.734982 0.0 1.0 0.485309 0.422619
3 2023-03-05 0.173844 0.418824 NaN 0.302607 0.701288 0.009916 0.000256 NaN 0.419958 ... 0.603903 0.401250 0.542537 0.624542 0.531677 0.761042 0.0 1.0 0.319804 0.428571
4 2023-03-06 0.363831 0.561599 NaN 0.381220 0.773714 0.008798 0.002112 NaN 0.562640 ... 0.608650 0.157500 0.568284 0.653999 0.557965 0.785925 0.0 1.0 0.263363 0.476190

5 rows × 34 columns

horizon = 28 * 2

train_size = len(wide_df) - horizon
train, test = wide_df[:train_size], wide_df[train_size:]


wide_df.shape, train.shape, test.shape
((245, 34), (189, 34), (56, 34))
futr_df=weather_avg_filtered_df[train_size:]
futr_df['unique_id'] = 'Consumption'
futr_df.shape
/tmp/ipykernel_1270693/3311167277.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
(56, 18)
futr_df.head()
ds temp_c is_day wind_kph wind_degree pressure_mb precip_mm humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv unique_id
249 2023-09-07 0.430882 0.5 0.251784 0.298630 0.699561 0.000000 0.748418 0.322083 0.397637 0.468407 0.371320 0.703475 0.0 1.000000 0.223274 0.345238 Consumption
250 2023-09-08 0.433660 0.5 0.348114 0.424559 0.656798 0.001612 0.732595 0.320417 0.394776 0.471001 0.371845 0.694052 0.0 0.989583 0.321547 0.345238 Consumption
251 2023-09-09 0.338889 0.5 0.389016 0.465761 0.732456 0.000358 0.443038 0.115833 0.308706 0.378816 0.283123 0.415931 0.0 1.000000 0.370020 0.309524 Consumption
252 2023-09-10 0.292810 0.5 0.490571 0.406685 0.789474 0.000000 0.489979 0.264167 0.268159 0.329060 0.235410 0.428445 0.0 1.000000 0.432852 0.267857 Consumption
253 2023-09-11 0.326471 0.5 0.583716 0.429085 0.822368 0.000000 0.456751 0.294583 0.298881 0.366758 0.262487 0.430654 0.0 1.000000 0.511288 0.303571 Consumption
from neuralforecast import models
models.__all__
['RNN',
 'GRU',
 'LSTM',
 'TCN',
 'DeepAR',
 'DilatedRNN',
 'MLP',
 'NHITS',
 'NBEATS',
 'NBEATSx',
 'TFT',
 'VanillaTransformer',
 'Informer',
 'Autoformer',
 'PatchTST',
 'FEDformer',
 'StemGNN',
 'HINT',
 'TimesNet']
from ray import tune

from neuralforecast.core import NeuralForecast
from neuralforecast.auto import AutoMLP
from neuralforecast.models import *
model_list = [RNN,
 GRU,
 LSTM,
 TCN,
 #DeepAR,  # not good
 DilatedRNN,
 MLP,
 NHITS,
 NBEATS,
 NBEATSx,
 #TFT, # too much GPU
 #VanillaTransformer, # not good
 #Informer,
 #Autoformer,
 #PatchTST, # too much GPU
 #FEDformer, #taken too long
 #StemGNN, #need n_series
 #HINT, #need n_series
 #TimesNet # takes too long
             ]
wide_df.columns
Index(['ds', 'Aircon', 'y', 'ConsumptionNet', 'Export', 'Fridge', 'Garage',
       'Hotwater', 'HouseBoardOutside', 'HouseBoardOutsideR', 'HouseBoardR',
       'Import', 'Lights', 'Net', 'OvenStove', 'Powerpoints', 'Production',
       'unique_id', 'temp_c', 'is_day', 'wind_kph', 'wind_degree',
       'pressure_mb', 'precip_mm', 'humidity', 'cloud', 'feelslike_c',
       'windchill_c', 'heatindex_c', 'dewpoint_c', 'chance_of_rain', 'vis_km',
       'gust_kph', 'uv'],
      dtype='object')
models = [model(input_size=2 * horizon,
                h=horizon,
                max_steps=50,
                hist_exog_list =['Aircon', 'Export', 'Fridge', 'Garage',
                                 'Hotwater', 
                                 'HouseBoardR','Import', 'Lights', 'Net', 'OvenStove',
                                 'Powerpoints', 'Production'
                                ],
                futr_exog_list = ['temp_c', 'is_day', 'wind_kph', 'wind_degree',
                    'pressure_mb', 'precip_mm', 'humidity', 'cloud', 'feelslike_c',
                    'windchill_c', 'heatindex_c', 'dewpoint_c', 'chance_of_rain',
                    'vis_km', 'gust_kph', 'uv']
               ) for model in model_list]
models
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
[RNN, GRU, LSTM, TCN, DilatedRNN, MLP, NHITS, NBEATS, NBEATSx]
nf = NeuralForecast(
    models=models, 
    freq='D')
train.columns
Index(['ds', 'Aircon', 'y', 'ConsumptionNet', 'Export', 'Fridge', 'Garage',
       'Hotwater', 'HouseBoardOutside', 'HouseBoardOutsideR', 'HouseBoardR',
       'Import', 'Lights', 'Net', 'OvenStove', 'Powerpoints', 'Production',
       'unique_id', 'temp_c', 'is_day', 'wind_kph', 'wind_degree',
       'pressure_mb', 'precip_mm', 'humidity', 'cloud', 'feelslike_c',
       'windchill_c', 'heatindex_c', 'dewpoint_c', 'chance_of_rain', 'vis_km',
       'gust_kph', 'uv'],
      dtype='object')
nf.fit(df=train)
consumption_predict = nf.predict(futr_df=futr_df).reset_index()
consumption_predict.head()
unique_id ds RNN GRU LSTM TCN DilatedRNN MLP NHITS NBEATS NBEATSx
0 Consumption 2023-09-07 0.520451 0.558734 0.566012 0.552208 0.565061 0.398763 0.235965 0.266845 0.296138
1 Consumption 2023-09-08 0.493308 0.514300 0.395049 0.494308 0.495290 0.387929 0.224346 0.191190 0.212631
2 Consumption 2023-09-09 0.425334 0.355697 0.433629 0.400758 0.381633 0.408256 0.241371 0.233107 0.228564
3 Consumption 2023-09-10 0.443577 0.498908 0.442869 0.366124 0.378713 0.407515 0.233445 0.234206 0.226971
4 Consumption 2023-09-11 0.353572 0.434181 0.394170 0.391079 0.314454 0.401142 0.212357 0.289439 0.254854
consumption_predict.drop('unique_id', axis=1, inplace=True)

just_consumption = df[df['unique_id'] == 'Consumption']
consumption_predict_plot = long_form(consumption_predict)
consumption_predict_plot = pd.concat([consumption_predict_plot, just_consumption]
                                     , ignore_index=True)
consumption_predict_plot
ds unique_id y
0 2023-09-07 RNN 0.520451
1 2023-09-08 RNN 0.493308
2 2023-09-09 RNN 0.425334
3 2023-09-10 RNN 0.443577
4 2023-09-11 RNN 0.353572
... ... ... ...
744 2023-10-28 Consumption 0.295413
745 2023-10-29 Consumption 0.096052
746 2023-10-30 Consumption 0.190600
747 2023-10-31 Consumption 0.244137
748 2023-11-01 Consumption 0.131755

749 rows × 3 columns

altair_plot(consumption_predict_plot)

Performance

from datasetsforecast.losses import (mae, mape, mase, rmse, smape)
consumption_predict.info(), consumption_predict.columns
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   ds          56 non-null     datetime64[ns]
 1   RNN         56 non-null     float64       
 2   GRU         56 non-null     float64       
 3   LSTM        56 non-null     float64       
 4   TCN         56 non-null     float64       
 5   DilatedRNN  56 non-null     float64       
 6   MLP         56 non-null     float64       
 7   NHITS       56 non-null     float64       
 8   NBEATS      56 non-null     float64       
 9   NBEATSx     56 non-null     float64       
dtypes: datetime64[ns](1), float64(9)
memory usage: 4.5 KB
(None,
 Index(['ds', 'RNN', 'GRU', 'LSTM', 'TCN', 'DilatedRNN', 'MLP', 'NHITS',
        'NBEATS', 'NBEATSx'],
       dtype='object'))
def evaluate_performace(y_hist, y_true, y_pred):
    #y_true = y_true.merge(y_pred, how='left', on=['unique_id', 'ds'])
    evaluation = {}
    print(y_pred.columns)
    for i, model in enumerate(y_pred.columns):
        if i < 1: continue
        evaluation[model] = {}
        for metric in [mase, mae, mape, rmse, smape]:
            metric_name = metric.__name__
            if metric_name == 'mase':
                evaluation[model][metric_name] = metric(y_true['y'].values, 
                                                    y_pred[model].values, 
                                                    y_hist['y'].values, seasonality=12)
            else:
                evaluation[model][metric_name] = metric(y_true['y'].values, y_pred[model].values)
    return pd.DataFrame(evaluation)
score = evaluate_performace(train, test, consumption_predict)
score.style.background_gradient(cmap ='YlGn', axis = 1,low=0.5, high=0.6)
Index(['ds', 'RNN', 'GRU', 'LSTM', 'TCN', 'DilatedRNN', 'MLP', 'NHITS',
       'NBEATS', 'NBEATSx'],
      dtype='object')
  RNN GRU LSTM TCN DilatedRNN MLP NHITS NBEATS NBEATSx
mase 0.708069 0.753612 0.653206 0.715462 0.683945 0.841424 0.585120 0.644987 0.540987
mae 0.116573 0.124071 0.107540 0.117790 0.112601 0.138528 0.096331 0.106187 0.089065
mape 117.972952 123.586528 92.569209 117.289240 113.036232 141.320639 63.815475 109.551444 72.312216
rmse 0.144370 0.158492 0.138631 0.148289 0.142224 0.161854 0.127389 0.136658 0.117224
smape 53.985201 55.843691 52.182055 53.097440 52.532795 59.642131 62.462127 55.998253 53.806687
score = data_scaler(score.T)
score.plot(kind='bar', legend=True)
<Axes: >

results = evaluate_performace(train, test, consumption_predict)
results.to_csv('Data/Neutral_results5.csv')
Index(['ds', 'RNN', 'GRU', 'LSTM', 'TCN', 'DilatedRNN', 'MLP', 'NHITS',
       'NBEATS', 'NBEATSx'],
      dtype='object')

Cross_Validate

cv_df = nf.cross_validation(train,
                            n_windows=3,
                            step_size= 1,
                           )
cv_df.columns
Index(['unique_id', 'ds', 'cutoff', 'RNN', 'GRU', 'LSTM', 'TCN', 'DilatedRNN',
       'MLP', 'NHITS', 'NBEATS', 'NBEATSx', 'Aircon', 'y', 'ConsumptionNet',
       'Export', 'Fridge', 'Garage', 'Hotwater', 'HouseBoardOutside',
       'HouseBoardOutsideR', 'HouseBoardR', 'Import', 'Lights', 'Net',
       'OvenStove', 'Powerpoints', 'Production', 'temp_c', 'is_day',
       'wind_kph', 'wind_degree', 'pressure_mb', 'precip_mm', 'humidity',
       'cloud', 'feelslike_c', 'windchill_c', 'heatindex_c', 'dewpoint_c',
       'chance_of_rain', 'vis_km', 'gust_kph', 'uv'],
      dtype='object')
cv_df.drop('cutoff', axis=1, inplace=True)
cv_df.drop('unique_id', axis=1, inplace=True)
cv_df['RNN']
0      0.435481
1      0.571911
2      0.712023
3      0.549668
4      0.572973
         ...   
163    0.331751
164    0.253077
165    0.284961
166    0.376310
167    0.242600
Name: RNN, Length: 168, dtype: float32
consumption_predict.columns
Index(['ds', 'RNN', 'GRU', 'LSTM', 'TCN', 'DilatedRNN', 'MLP', 'NHITS',
       'NBEATS', 'NBEATSx'],
      dtype='object')
cv_df_filter = cv_df[consumption_predict.columns]
cv_df_filter
ds RNN GRU LSTM TCN DilatedRNN MLP NHITS NBEATS NBEATSx
0 2023-07-11 0.435481 0.441562 0.498433 0.483206 0.500122 0.580595 0.490572 0.561005 0.524275
1 2023-07-12 0.571911 0.569870 0.579989 0.628650 0.598320 0.679515 0.452369 0.516658 0.289955
2 2023-07-13 0.712023 0.640818 0.719223 0.676610 0.605447 0.670375 0.338826 0.413797 0.194005
3 2023-07-14 0.549668 0.612540 0.583302 0.630741 0.591179 0.687667 0.341010 0.349947 0.284295
4 2023-07-15 0.572973 0.515143 0.499026 0.550076 0.550768 0.460477 0.225505 0.478275 0.333824
... ... ... ... ... ... ... ... ... ... ...
163 2023-09-02 0.331751 0.273870 0.252772 0.315278 0.250863 0.163495 0.206029 0.343418 0.262445
164 2023-09-03 0.253077 0.223345 0.258903 0.295469 0.289489 0.261679 0.284491 0.301202 0.263286
165 2023-09-04 0.284961 0.200487 0.221326 0.324679 0.294298 0.254948 0.422089 0.328173 0.222025
166 2023-09-05 0.376310 0.339444 0.321259 0.406183 0.381957 0.062036 0.235567 0.484579 -0.211019
167 2023-09-06 0.242600 0.215008 0.229029 0.244884 0.185244 0.396192 0.361900 0.446674 0.201929

168 rows × 10 columns

cv_df_plot = long_form(cv_df_filter)
cv_df_plot
ds unique_id y
0 2023-07-11 RNN 0.435481
1 2023-07-12 RNN 0.571911
2 2023-07-13 RNN 0.712023
3 2023-07-14 RNN 0.549668
4 2023-07-15 RNN 0.572973
... ... ... ...
1507 2023-09-02 NBEATSx 0.262445
1508 2023-09-03 NBEATSx 0.263286
1509 2023-09-04 NBEATSx 0.222025
1510 2023-09-05 NBEATSx -0.211019
1511 2023-09-06 NBEATSx 0.201929

1512 rows × 3 columns

altair_plot(cv_df_plot)
Back to top